---
title: "CES dataset"
author: "Miles Quarterman"
date: "2023-08-17"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

## R Markdown
## set up
```{r}
library(dplyr)
install.packages("writexl")
library("writexl")
library(tidyverse)
ces_dataset <- read.csv("modified_policy_outcomes_and_notes.xlsx - Sheet1.csv")
comparative_agendas <- read.csv("rcv_voteview_v3_1 (2).csv")
comparative_agendas_new <- subset(comparative_agendas, select = c(year, bill_number, vote_question, pap_majortopic, pap_subtopic, majortopic, subtopic))
```
## combining datasets
```{r}
ces_dataset_new <- merge(ces_dataset, comparative_agendas_new, by = c("bill_id"))
```
##clean up
```{r}
ces_dataset_new <- subset(ces_dataset_new, vote_question == "On Passage of the Bill" | vote_question == "On Agreeing to the Resolution" | vote_question == "On the Nomination" | vote_question == "On Passage" | vote_question == "On Motion to Suspend the Rules and Pass" | vote_question == "Passage, Objections of the President Not Withstanding" | vote_question == "On Motion to Suspend the Rules and Pass, as Amended" | vote_question == "On Agreeing to the Conference Report" | vote_question == "On the Cloture Motion" | vote_question == "On Agreeing to Article I of the Resolution" | vote_question == "On Agreeing to Article II of the Resolution")
```
##Export new dataframe
```{r}
write_xlsx(ces_dataset_new, "ces_CAP-code.xlsx")
```
## Creating bar graph
```{r}
CES_questions <- read.csv("CES CAP Coding All Questions.csv")
CES_questions_topics <- as.data.frame(table(CES_questions$majortopic))
CES_questions_topics$percent_freq <- 100*(CES_questions_topics$Freq/144)
CES_questions_topics$Topics <- c('Econ', 'Civ Rights', 'Health', 'Ag', 'Labor', 'Edu', 'Env', 'Immigr', 'Trans', 'Crime', 'SW', 'Housing', 'Dom Comm', 'Def', 'Tech', 'Frgn Trade', 'Int Aff', 'Govt Ops')
class(CES_questions_topics$Var1) = "Numeric"
CES_questions_topics[nrow(CES_questions_topics) + 1,] <- list(7, 0, 0, 'Energy')
CES_questions_topics[nrow(CES_questions_topics) + 1,] <- list(21, 0, 0, 'Publ Lands')
CES_questions_topics[nrow(CES_questions_topics) + 1,] <- list(23, 0, 0, 'Culture')
ggplot(CES_questions_topics, aes(x = Topics,y = percent_freq)) +
  geom_col() +
  labs(title = "Topic Distribution of CES Questions", x = "Topics", y= "Frequency (%)")
write_xlsx(CES_questions_topics, "CES_topics.xlsx")
```